This file is used to generate a dataset containing only ORS and IFE basal cells.
library(dplyr)
library(patchwork)
library(ggplot2)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
save_name = "ors_ifeb"
out_dir = "."
We load the sample information :
sample_info = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_sample_info.rds"))
project_names_oi = sample_info$project_name
graphics::pie(rep(1, nrow(sample_info)),
col = sample_info$color,
labels = sample_info$project_name)
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1))
We load the combined dataset containing all cell types from all samples :
sobj = readRDS(paste0(out_dir, "/../../3_combined/hs_hd_sobj.rds"))
sobj
## An object of class Seurat
## 20003 features across 12111 samples within 1 assay
## Active assay: RNA (20003 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne
We represent cells in the tSNE :
name2D = "harmony_38_tsne"
We smooth cell type annotation at a cluster level :
cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(sobj$cell_type)[cluster_type])
sobj$cluster_type = cluster_type[sobj$seurat_clusters]
We look gene markers expression level, cell annotation and cluster-smoothed annotation on the projection, to locate ors_ifeb cells :
ors_ifeb_markers = c("KRT16", "EHF", "ALDH3A1")
ors_ifeb_cell_type = c("ORS", "IFE basal")
color_markers[!(names(color_markers) %in% ors_ifeb_cell_type)] = "gray92"
# Feature Plot
plot_list = lapply(ors_ifeb_markers, FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, reduction = name2D,
features = one_gene) +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5))
return(p)
})
# Cell type annotation
plot_list[[length(plot_list) + 1]] = Seurat::DimPlot(sobj, group.by = "cell_type",
cols = color_markers, reduction = name2D,
order = save_name) +
ggplot2::labs(title = "Cell annotation",
subtitle = paste0(sum(sobj$cell_type %in% ors_ifeb_cell_type),
" cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Cluster-smoothed annotation
plot_list[[length(plot_list) + 1]] = Seurat::DimPlot(sobj,
reduction = name2D,
group.by = "cluster_type") +
ggplot2::scale_color_manual(values = c(unname(unlist(color_markers[ors_ifeb_cell_type])),
rep("gray92", length(color_markers) - length(ors_ifeb_cell_type))),
breaks = c(ors_ifeb_cell_type, setdiff(names(color_markers), ors_ifeb_cell_type))) +
ggplot2::labs(title = "Cluster annotation",
subtitle = paste0(sum(sobj$cluster_type %in% ors_ifeb_cell_type),
" cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
Due to clustering that badly separate ORS from IFE, there is a mis-selection of cells of interest : false positive. However, annotation smoothing adds a lot of (all) false negative and clean the annotation. We extract cells of interest based on clustering. Then, we generate a new cluster to remove false positive.
We extract cells of interest based on the clustering :
sobj = subset(sobj, cluster_type %in% ors_ifeb_cell_type)
sobj
## An object of class Seurat
## 20003 features across 3532 samples within 1 assay
## Active assay: RNA (20003 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne
We remove all things that were calculated based on the full atlas :
sobj = Seurat::DietSeurat(sobj)
sobj
## An object of class Seurat
## 20003 features across 3532 samples within 1 assay
## Active assay: RNA (20003 features, 2000 variable features)
We keep a subset of meta.data and reset levels :
sobj@meta.data = sobj@meta.data[, c("orig.ident", "nCount_RNA", "nFeature_RNA", "log_nCount_RNA",
"project_name", "sample_identifier", "sample_type",
"laboratory", "location", "Seurat.Phase", "cyclone.Phase",
"percent.mt", "percent.rb", "cell_type")]
sobj$orig.ident = factor(sobj$orig.ident, levels = levels(sample_info$project_name))
sobj$project_name = factor(sobj$project_name, levels = levels(sample_info$project_name))
sobj$sample_identifier = factor(sobj$sample_identifier, levels = levels(sample_info$sample_identifier))
sobj$sample_type = factor(sobj$sample_type, levels = levels(sample_info$sample_type))
summary(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA project_name
## 2021_31:396 Min. : 711 Min. : 500 Min. : 6.572 2021_31:396
## 2021_36:163 1st Qu.: 3500 1st Qu.:1076 1st Qu.: 8.161 2021_36:163
## 2021_41:628 Median : 9494 Median :2574 Median : 9.159 2021_41:628
## 2022_03:995 Mean :12083 Mean :2593 Mean : 8.971 2022_03:995
## 2022_14:667 3rd Qu.:16744 3rd Qu.:3738 3rd Qu.: 9.726 2022_14:667
## 2022_01:324 Max. :74969 Max. :7123 Max. :11.225 2022_01:324
## 2022_02:359 2022_02:359
## sample_identifier sample_type laboratory location
## HS_1:396 HS:2849 Length:3532 Length:3532
## HS_2:163 HD: 683 Class :character Class :character
## HS_3:628 Mode :character Mode :character
## HS_4:995
## HS_5:667
## HD_1:324
## HD_2:359
## Seurat.Phase cyclone.Phase percent.mt percent.rb
## Length:3532 Length:3532 Min. : 0.0000 Min. : 2.257
## Class :character Class :character 1st Qu.: 0.2583 1st Qu.:22.168
## Mode :character Mode :character Median : 3.6962 Median :27.101
## Mean : 3.9238 Mean :26.440
## 3rd Qu.: 5.9263 3rd Qu.:31.222
## Max. :19.8012 Max. :46.017
##
## cell_type
## IFE basal :1749
## ORS :1506
## IFE granular spinous: 165
## HF-SCs : 58
## proliferative : 24
## sebocytes : 20
## (Other) : 10
How many cells by sample ?
table(sobj$project_name)
##
## 2021_31 2021_36 2021_41 2022_03 2022_14 2022_01 2022_02
## 396 163 628 995 667 324 359
We represent this information as a barplot :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("project_name", "cell_type", "nb_cells")),
x = "project_name", y = "nb_cells", fill = "cell_type",
position = position_stack()) +
ggplot2::scale_fill_manual(values = color_markers,
breaks = names(color_markers),
name = "Cell type")
We normalize gene expression for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize")
sobj = Seurat::FindVariableFeatures(sobj, nfeatures = 3000)
sobj = Seurat::ScaleData(sobj)
sobj
## An object of class Seurat
## 20003 features across 3532 samples within 1 assay
## Active assay: RNA (20003 features, 3000 variable features)
We perform a PCA :
sobj = Seurat::RunPCA(sobj,
assay = "RNA",
reduction.name = "RNA_pca",
npcs = 100,
seed.use = 1337L)
sobj
## An object of class Seurat
## 20003 features across 3532 samples within 1 assay
## Active assay: RNA (20003 features, 3000 variable features)
## 1 dimensional reduction calculated: RNA_pca
We choose the number of dimensions such that they summarize 60 % of the variability :
stdev = sobj@reductions[["RNA_pca"]]@stdev
stdev_prop = cumsum(stdev)/sum(stdev)
ndims = which(stdev_prop > 0.60)[1]
ndims
## [1] 48
We can visualize this on the elbow plot :
elbow_p = Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca") +
ggplot2::geom_point(x = ndims, y = stdev[ndims], col = "red")
x_text = ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>% as.numeric()
elbow_p = elbow_p +
ggplot2::scale_x_continuous(breaks = sort(c(x_text, ndims)), limits = c(0, 100))
x_color = ifelse(ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>%
as.numeric() %>% round(., 2) == round(ndims, 2), "red", "black")
elbow_p = elbow_p +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = element_text(color = x_color))
elbow_p
We generate a tSNE and a UMAP with 48 principal components :
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We can visualize the two representations :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We generate a clustering from the PCA :
reduction_name = "RNA_pca"
sobj = Seurat::FindNeighbors(sobj, reduction = reduction_name)
sobj = Seurat::FindClusters(sobj, resolution = 1.7)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3532
## Number of edges: 111658
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7631
## Number of communities: 23
## Elapsed time: 0 seconds
cluster_plot = Seurat::DimPlot(sobj, label = TRUE,
reduction = paste0(reduction_name, "_", ndims, "_umap")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1)
cluster_plot
We look at key markers for ors_ifeb cells, and for eventual contamination :
plot_list = lapply(c("nFeature_RNA", ors_ifeb_markers,
c("SPINK5", "KRT1", "KRTDAP", "CIDEA")), FUN = function(one_gene) {
Seurat::FeaturePlot(sobj, features = one_gene,
reduction = paste0(reduction_name, "_", ndims, "_umap")) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() + Seurat::NoLegend()
})
patchwork::wrap_plots(plot_list, nrow = 2)
We select clusters based on KRTDAP expression :
mean_score_thresh = 1
sobj$KRTDAP_expr = Seurat::FetchData(sobj, "KRTDAP")
score_by_clusters = sobj@meta.data %>%
dplyr::select(seurat_clusters, KRTDAP_expr) %>%
dplyr::group_by(seurat_clusters) %>%
dplyr::summarise(avg_score = mean(KRTDAP_expr)) %>%
as.data.frame()
ggplot2::ggplot(score_by_clusters, aes(x = seurat_clusters, y = avg_score)) +
ggplot2::geom_point() +
ggplot2::geom_hline(yintercept = mean_score_thresh, col = "red") +
ggplot2::labs(x = "Cluster ID", y = "Mean KRTDAP expression",
title = "Proportion of cells by cluster") +
ggplot2::theme_classic() +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
We remove clusters above threshold :
clusters_to_remove = score_by_clusters %>%
dplyr::filter(avg_score > mean_score_thresh) %>%
dplyr::pull(seurat_clusters) %>%
as.character()
sobj = subset(sobj, idents = clusters_to_remove, invert = TRUE)
sobj
## An object of class Seurat
## 20003 features across 3416 samples within 1 assay
## Active assay: RNA (20003 features, 3000 variable features)
## 3 dimensional reductions calculated: RNA_pca, RNA_pca_48_tsne, RNA_pca_48_umap
We remove all reductions in this dataset :
sobj = Seurat::DietSeurat(sobj, assays = "RNA")
sobj
## An object of class Seurat
## 20003 features across 3416 samples within 1 assay
## Active assay: RNA (20003 features, 3000 variable features)
How many cells by sample ?
table(sobj$project_name)
##
## 2021_31 2021_36 2021_41 2022_03 2022_14 2022_01 2022_02
## 368 160 607 981 637 318 345
We represent this information as a barplot :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("project_name", "cell_type", "nb_cells")),
x = "project_name", y = "nb_cells", fill = "cell_type",
position = position_fill()) +
ggplot2::scale_fill_manual(values = color_markers,
breaks = names(color_markers),
name = "Cell type")
No contamination remaining !
We remove genes that are expressed in less than 5 cells :
sobj = aquarius::filter_features(sobj, min_cells = 5)
## [1] 20003 3416
## [1] 16683 3416
sobj
## An object of class Seurat
## 16683 features across 3416 samples within 1 assay
## Active assay: RNA (16683 features, 2837 variable features)
We normalize the count matrix for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize")
sobj = Seurat::FindVariableFeatures(sobj, nfeatures = 2000)
sobj = Seurat::ScaleData(sobj)
sobj
## An object of class Seurat
## 16683 features across 3416 samples within 1 assay
## Active assay: RNA (16683 features, 2000 variable features)
We perform a PCA :
sobj = Seurat::RunPCA(sobj,
assay = "RNA",
reduction.name = "RNA_pca",
npcs = 100,
seed.use = 1337L)
sobj
## An object of class Seurat
## 16683 features across 3416 samples within 1 assay
## Active assay: RNA (16683 features, 2000 variable features)
## 1 dimensional reduction calculated: RNA_pca
We choose the number of dimensions such that they summarize 35 % of the variability :
stdev = sobj@reductions[["RNA_pca"]]@stdev
stdev_prop = cumsum(stdev)/sum(stdev)
ndims = which(stdev_prop > 0.35)[1]
ndims
## [1] 20
We can visualize this on the elbow plot :
elbow_p = Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca") +
ggplot2::geom_point(x = ndims, y = stdev[ndims], col = "red")
x_text = ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>% as.numeric()
elbow_p = elbow_p +
ggplot2::scale_x_continuous(breaks = sort(c(x_text, ndims)), limits = c(0, 100))
x_color = ifelse(ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>%
as.numeric() %>% round(., 2) == round(ndims, 2), "red", "black")
elbow_p = elbow_p +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = element_text(color = x_color))
elbow_p
We generate a tSNE and a UMAP with 20 principal components :
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We can visualize the two representations :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
There is a batch-effect mainly in the right population : due to the disease ?
We remove batch-effect using Harmony :
`%||%` = function(lhs, rhs) {
if (!is.null(x = lhs)) {
return(lhs)
} else {
return(rhs)
}
}
set.seed(1337L)
sobj = harmony::RunHarmony(object = sobj,
group.by.vars = "project_name",
plot_convergence = TRUE,
reduction = "RNA_pca",
assay.use = "RNA",
reduction.save = "harmony",
max.iter.harmony = 20,
project.dim = FALSE)
From this batch-effect removed projection, we generate a tSNE and a UMAP.
sobj = Seurat::RunUMAP(sobj,
seed.use = 1337L,
dims = 1:ndims,
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_umap"),
reduction.key = paste0("harmony_", ndims, "umap_"))
sobj = Seurat::RunTSNE(sobj,
dims = 1:ndims,
seed.use = 1337L,
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_tsne"),
reduction.key = paste0("harmony", ndims, "tsne_"))
These are the corrected UMAP and tSNE :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We will keep the tSNE from Harmony :
reduction = "harmony"
name2D = paste0("harmony_", ndims, "_tsne")
We generate a clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = reduction, dims = 1:ndims)
sobj = Seurat::FindClusters(sobj, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3416
## Number of edges: 133015
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7801
## Number of communities: 12
## Elapsed time: 0 seconds
dimplot_clusters = Seurat::DimPlot(sobj, reduction = name2D, label = TRUE) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1)
dimplot_clusters
We can represent the 4 quality metrics :
plot_list = Seurat::FeaturePlot(sobj, reduction = name2D,
combine = FALSE, pt.size = 0.5,
features = c("percent.mt", "percent.rb", "nFeature_RNA", "log_nCount_RNA"))
plot_list = lapply(plot_list, FUN = function(one_plot) {
one_plot +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1)
})
patchwork::wrap_plots(plot_list, nrow = 1)
We can visualize the two batch-effect corrected representations :
plot_list = lapply(c(paste0("harmony_", ndims, "_tsne"),
paste0("harmony_", ndims, "_umap")), FUN = function(one_proj) {
Seurat::DimPlot(sobj, group.by = "project_name",
reduction = one_proj) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle(one_proj) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
})
patchwork::wrap_plots(plot_list, ncol = 2)
We also visualize cell types :
plot_list = lapply(c(paste0("harmony_", ndims, "_tsne"),
paste0("harmony_", ndims, "_umap")), FUN = function(one_proj) {
Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = one_proj) +
ggplot2::scale_color_manual(values = color_markers,
breaks = names(color_markers)) +
Seurat::NoAxes() + ggplot2::ggtitle(one_proj) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
})
patchwork::wrap_plots(plot_list, ncol = 2)
We can represent clusters, split by sample of origin :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "sample_identifier",
group_by = "seurat_clusters",
split_color = setNames(sample_info$color,
nm = sample_info$sample_identifier),
group_color = aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))),
main_pt_size = 0.5, bg_pt_size = 0.5)
plot_list[[length(plot_list) + 1]] = dimplot_clusters
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "none")
We make a heatmap to see clusters distribution among samples :
cluster_markers = c("KRT14",
# IFE basal-related
"KRT16", "MGP", "KRT6C", "CST6",
# IFE basal
"GPX2", "C1QTNF12", "PTN", "CLEC2B", "TGFBI",
# QC metrics
"TOP2A", "MCM5",
"percent.mt", "percent.rb", "log_nCount_RNA")
ht_annot = Seurat::FetchData(sobj, slot = "data", vars = cluster_markers) %>%
as.data.frame()
ht_annot$clusters = sobj$seurat_clusters
ht_annot = ht_annot %>%
dplyr::group_by(clusters) %>%
dplyr::summarise_all(funs('mean' = mean)) %>%
as.data.frame() %>%
dplyr::select(-clusters) %>%
`colnames<-`(c(cluster_markers))
head(ht_annot)
## KRT14 KRT16 MGP KRT6C CST6 GPX2 C1QTNF12
## 1 4.594290 0.06099838 0.006447587 0.02332942 0.02215209 0.95327612 1.08262394
## 2 5.822020 4.55832660 0.105068361 2.11952477 1.00988691 0.06301549 0.09281149
## 3 4.542902 0.06463788 0.020509349 0.02724677 0.02977043 0.68825415 1.13116006
## 4 4.643753 0.05179121 0.001583540 0.01026374 0.01917485 0.70179295 0.55828331
## 5 5.020896 0.11226035 0.012526752 0.01021032 0.02190074 0.33410133 0.15685593
## 6 4.973422 1.63007168 1.777878336 0.21746173 0.17254779 0.03252731 0.01617996
## PTN CLEC2B TGFBI TOP2A MCM5 percent.mt
## 1 1.155960932 0.362149066 0.14961623 0.018487882 0.14410646 5.8876590
## 2 0.096835341 0.004336307 0.00000000 0.004419252 0.01873488 0.2179762
## 3 1.683696801 0.088212797 0.08970869 0.017327792 0.11218501 5.8619901
## 4 0.270831349 0.726225494 0.52769949 0.013035064 0.08841249 6.3534252
## 5 0.003856703 1.001295689 0.63708980 0.006790053 0.08129976 7.8340236
## 6 0.167698275 0.013468035 0.02512972 0.051624334 0.14729860 3.3225015
## percent.rb log_nCount_RNA
## 1 30.57707 9.476068
## 2 24.84052 7.802923
## 3 27.67438 9.311167
## 4 30.87513 9.257627
## 5 26.55977 9.587476
## 6 23.74584 10.040056
color_fun = function(one_gene) {
gene_range = range(ht_annot[, one_gene])
gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
breaks = seq(from = gene_range[1], to = gene_range[2],
length.out = length(aquarius::color_gene)))
return(gene_palette)
}
ha = ComplexHeatmap::HeatmapAnnotation(df = ht_annot,
which = "column",
show_legend = TRUE,
col = setNames(nm = cluster_markers,
lapply(cluster_markers, FUN = color_fun)),
annotation_name_side = "left")
ht = aquarius::plot_prop_heatmap(df = sobj@meta.data[, c("sample_identifier", "seurat_clusters")],
bottom_annotation = ha,
cluster_rows = TRUE,
prop_margin = 1,
row_names_gp = grid::gpar(names = sample_info$sample_identifier,
col = sample_info$color,
fontface = "bold"),
row_title = "Sample",
column_title = "Cluster")
ComplexHeatmap::draw(ht,
merge_legends = TRUE)
We also look at genes of interest on the projection :
plot_list = lapply(cluster_markers, FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, features = one_gene,
pt.size = 0.2, reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1)
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 5)
We save the Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/", save_name, "_sobj.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 patchwork_1.1.2 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] here_1.0.1 TInGa_0.0.0.9000
## [59] ggraph_2.0.3 pkgconfig_2.0.3
## [61] GO.db_3.10.0 DelayedMatrixStats_1.8.0
## [63] gower_0.2.1 ggbeeswarm_0.6.0
## [65] iterators_1.0.12 DropletUtils_1.6.1
## [67] reticulate_1.26 clusterProfiler_3.14.3
## [69] SummarizedExperiment_1.16.1 circlize_0.4.15
## [71] beeswarm_0.4.0 GetoptLong_1.0.5
## [73] xfun_0.35 bslib_0.3.1
## [75] zoo_1.8-10 tidyselect_1.1.0
## [77] reshape2_1.4.4 purrr_0.3.4
## [79] ica_1.0-2 pcaPP_1.9-73
## [81] viridisLite_0.3.0 rtracklayer_1.46.0
## [83] rlang_1.0.2 hexbin_1.28.1
## [85] jquerylib_0.1.4 dyneval_0.9.9
## [87] glue_1.4.2 RColorBrewer_1.1-2
## [89] matrixStats_0.56.0 stringr_1.4.0
## [91] lava_1.6.7 europepmc_0.3
## [93] DESeq2_1.26.0 recipes_0.1.17
## [95] labeling_0.3 harmony_0.1.0
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] ComplexHeatmap_2.14.0 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 grid_3.6.3
## [303] lazyeval_0.2.2 Formula_1.2-3
## [305] tsne_0.1-3 crayon_1.3.4
## [307] MASS_7.3-54 pROC_1.16.2
## [309] viridis_0.5.1 dynparam_1.0.0
## [311] rpart_4.1-15 zinbwave_1.8.0
## [313] compiler_3.6.3 ggtext_0.1.0